# Replication files for:
# "Generalizability of Heterogeneous Treatment Effect Estimates Across Samples"
# Alexander Coppock, Thomas J. Leeper, and Kevin J. Mullinix
# Proceedings of the National Academy of Sciences, Forthcoming

rm(list = ls())
library(dplyr)
library(readr)
library(xtable)
library(deming)
library(broom)

# load data ---------------------------------------------------------------

scatter_df <- read_rds("CLM_scatter_df.rds")

# Helper functions --------------------------------------------------------

tidy.deming <- function(fit){
  dat <- data.frame(
    names(fit$coefficients),
    fit$coefficients, 
    fit$se, 
    fit$ci)
    colnames(dat) <- c("term", "estimate", "std.error", "conf.low", "conf.high")
    rownames(dat) <- NULL
    dat
}

add_parens <- function(x, digits = 2) {
  x <- as.numeric(x)
  return(paste0("(", sprintf(paste0("%.", digits, "f"), x), ")"))
}

format_num <- function(x, digits = 2) {
  x <- as.numeric(x)
  return(paste0(sprintf(paste0("%.", digits, "f"), x)))
}


# Estimate deming regressions by covariate group --------------------------

set.seed(343)
group_deming <-
  scatter_df %>%
  group_by(group_label) %>%
  do(tidy(
    deming(
      estimate_original ~ estimate_mt,
      ystd = std.error_original,
      xstd = std.error_mt,
      data = .
    )
  )) %>%
  filter(term == "estimate_mt") %>%
  ungroup


group_deming <-
  group_deming %>%
  transmute(
    group_label,
    est = paste0(format_num(estimate), " ", add_parens(std.error)),
    ci = paste0("[", format_num(conf.low), ", ", format_num(conf.high), "]")
  )

group_table <-
  scatter_df %>%
  group_by(group_label) %>%
  summarize(n = n())

group_table <- left_join(group_deming, group_table)

group_table %>%
  xtable %>%
  print.xtable(
    include.colnames = FALSE,
    include.rownames = FALSE,
    hline.after = c(),
    only.contents = TRUE,
    file = "group_table.tex")

# in text number
# The slopes are all positive, ranging from 0.71 to 1.01.
group_deming %>% summarize(min(estimate), max(estimate))

# # A tibble: 1 x 2
# `min(estimate)` `max(estimate)`
# <dbl>           <dbl>
#   1           0.713            1.01


# Estimate deming regressions by study ------------------------------------

set.seed(343)
study_deming <-
  scatter_df %>%
  group_by(study_label) %>%
  do(tidy(deming(estimate_original ~ estimate_mt, 
                 ystd = std.error_original, xstd = std.error_mt, data = .))) %>%
  filter(term == "estimate_mt") %>% 
  ungroup %>%
  transmute(study_label, 
            est = paste0(format_num(estimate), " ", add_parens(std.error)),
            ci = paste0("[", format_num(conf.low), ", ", format_num(conf.high), "]"))

study_table <-
  scatter_df %>%
  group_by(study_label) %>%
  summarize(
    n = n()
  )

study_ns_df <- read_rds("CLM_study_ns_df.rds")
f_tests_df <- readr::read_rds("CLM_f_tests_df.rds")

study_table <- 
  study_ns_df %>% 
  left_join(f_tests_df) %>%
  left_join(study_table) %>%
  left_join(study_deming) %>%
  select(study_label, original_n, mt_n, est, ci, n, p_value) %>% 
  arrange(study_label)

# Claim from abstract
# We analyze subgroup conditional average treatment effects 
# using 27 original-replication study pairs 
# (encompassing 101,745 individual survey responses)
sum(study_table$original_n) + sum(study_table$mt_n)

study_table %>%
  xtable %>%
  print.xtable(
    include.colnames = FALSE,
    include.rownames = FALSE,
    hline.after = c(),
    only.contents = TRUE,
    file = "study_table.tex")
